#include "cuda_runtime_wrapper.h"
#include "cuda_context_wrapper.h"
#include "mc_runtime_api.h"
#include "mcr/mc_runtime_api_deprecated.h"
#include <limits>
#include <map>

#define ACTIVE_CONTEXT()   g_hasContextBeenCreated = true;
#define DEACTIVE_CONTEXT() g_hasContextBeenCreated = false;
thread_local mcError_t g_last_error = mcSuccess;

#define WCUDA_RETURN(ret, ...)                                                                     \
    do {                                                                                           \
        mcError_t error  = (ret);                                                                  \
        auto launchError = mcGetLaunchKernelError();                                               \
                                                                                                   \
        if (mcSuccess != error && mcErrorNotReady != error) {                                      \
            g_last_error = error;                                                                  \
        } else if (launchError != mcSuccess) {                                                     \
            g_last_error = launchError;                                                            \
        }                                                                                          \
        mcResetLaunchKernelError();                                                                \
        if (g_hasContextBeenCreated && !mcGetPrimaryCtxBindState()) {                              \
            mcSetPrimaryCtxBindState();                                                            \
        }                                                                                          \
        return error;                                                                              \
    } while (0);

mcError_t wcudaGetLastErrorImpl()
{
    auto launchError = mcGetLaunchKernelError();
    mcError_t err    = (launchError != mcSuccess) ? launchError : g_last_error;
    g_last_error  = mcSuccess;
    mcResetLaunchKernelError();
    return err;
}

mcError_t wcudaPeekAtLastErrorImpl()
{
    auto launchError = mcGetLaunchKernelError();
    mcError_t err    = (launchError != mcSuccess) ? launchError : g_last_error;
    return err;
}
//---------------------------------------------------------------------------//
// Device Management
//---------------------------------------------------------------------------//

mcError_t wcudaGetDevice(int *deviceId) { WCUDA_RETURN(mcGetDevice(deviceId)); }

mcError_t wcudaSetDevice(int deviceId)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcSetDevice(deviceId));
}

mcError_t wcudaGetDeviceFlags(unsigned int *flags) { WCUDA_RETURN(mcGetDeviceFlags(flags)); }

mcError_t wcudaSetDeviceFlags(unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcSetDeviceFlags(flags));
}

mcError_t wcudaGetDeviceCount(int *count) { WCUDA_RETURN(mcGetDeviceCount(count)); }

mcError_t wcudaGetDeviceProperties(mcDeviceProp_t *prop, int device)
{
    if (prop == nullptr) {
        WCUDA_RETURN(mcErrorInvalidValue);
    }

    auto ret = mcGetDeviceProperties(prop, device);
    if (ret != mcSuccess) {
        WCUDA_RETURN(ret);
    }
    if (__NV_ARCH_V100 == __nv_arch_type_internal__) {
        prop->major = 7;
        prop->minor = 0;
    } else if (__NV_ARCH_T4 == __nv_arch_type_internal__) {
        prop->major = 7;
        prop->minor = 5;
    } else if (__NV_ARCH_A100 == __nv_arch_type_internal__) {
        prop->major = 8;
        prop->minor = 0;
    }

    char *var            = getenv("WARPSIZE");
    prop->warpSize       = (var != nullptr) ? atoi(var) : prop->warpSize;
    prop->waveSize       = prop->warpSize;
    var                  = getenv("TOTALMEMORY");
    prop->totalGlobalMem = (var != nullptr) ? (size_t)strtoul(var, 0, 0) : prop->totalGlobalMem;
    WCUDA_RETURN(ret);
}

mcError_t wcudaDeviceGetAttribute(int *value, enum mcDeviceAttribute_t attr, int device)
{
    if (value == nullptr) {
        WCUDA_RETURN(mcErrorInvalidValue);
    }
    if ((attr < mcDeviceAttributeMaxThreadsPerBlock) || (attr >= mcDeviceAttributeUnknow)) {
        WCUDA_RETURN(mcErrorInvalidValue);
    }
    mcDeviceProp_t prop = {};
    mcError_t err       = wcudaGetDeviceProperties(&prop, device);
    if (err != mcSuccess) {
        WCUDA_RETURN(err);
    }

    int count;
    mcGetDeviceCount(&count);
    if (device < 0 || device >= count) {
        WCUDA_RETURN(mcErrorInvalidDevice);
    }

    switch (attr) {
    case mcDeviceAttributeMaxThreadsPerBlock:
        *value = prop.maxThreadsPerBlock;
        break;
    case mcDeviceAttributeMaxBlockDimX:
        *value = prop.maxThreadsDim[0];
        break;
    case mcDeviceAttributeMaxBlockDimY:
        *value = prop.maxThreadsDim[1];
        break;
    case mcDeviceAttributeMaxBlockDimZ:
        *value = prop.maxThreadsDim[2];
        break;
    case mcDeviceAttributeMaxGridDimX:
        *value = prop.maxGridSize[0];
        break;
    case mcDeviceAttributeMaxGridDimY:
        *value = prop.maxGridSize[1];
        break;
    case mcDeviceAttributeMaxGridDimZ:
        *value = prop.maxGridSize[2];
        break;
    case mcDeviceAttributeMaxSharedMemoryPerBlock:
        *value = prop.sharedMemPerBlock;
        break;
    case mcDeviceAttributeTotalConstantMemory:
        *value = prop.totalConstMem;
        break;
    case mcDeviceAttributeWarpSize:
    case mcDeviceAttributeWaveSize:
        *value = prop.warpSize;
        break;
    case mcDeviceAttributeMaxRegistersPerBlock:
        *value = prop.regsPerBlock;
        break;
    case mcDeviceAttributeMaxRegistersPerMultiprocessor:
        *value = prop.regsPerMultiprocessor;
        break;
    case mcDeviceAttributeClockRate:
        *value = prop.clockRate;
        break;
    case mcDeviceAttributeMemoryClockRate:
        *value = prop.memoryClockRate;
        break;
    case mcDeviceAttributeMemoryBusWidth:
        *value = prop.memoryBusWidth;
        break;
    case mcDeviceAttributeMultiProcessorCount:
        *value = prop.multiProcessorCount;
        break;
    case mcDeviceAttributeComputeMode:
        *value = prop.computeMode;
        break;
    case mcDeviceAttributeL2CacheSize:
        *value = prop.l2CacheSize;
        break;
    case mcDeviceAttributeMaxThreadsPerMultiProcessor:
        *value = prop.maxThreadsPerMultiProcessor;
        break;
    case mcDeviceAttributeComputeCapabilityMajor:
        *value = prop.major;
        break;
    case mcDeviceAttributeComputeCapabilityMinor:
        *value = prop.minor;
        break;
    case mcDeviceAttributePciBusId:
        *value = prop.pciBusID;
        break;
    case mcDeviceAttributeConcurrentKernels:
        *value = prop.concurrentKernels;
        break;
    case mcDeviceAttributePciDeviceId:
        *value = prop.pciDeviceID;
        break;
    case mcDeviceAttributeMaxSharedMemoryPerMultiprocessor:
    case mcDeviceAttributeMaxSharedMemoryPerBlockOptin:
        *value = prop.maxSharedMemoryPerMultiProcessor;
        break;
    case mcDeviceAttributeIsMultiGpuBoard:
        *value = prop.isMultiGpuBoard;
        break;
    case mcDeviceAttributeCooperativeLaunch:
        *value = prop.cooperativeLaunch;
        break;
    case mcDeviceAttributeCooperativeMultiDeviceLaunch:
        *value = prop.cooperativeMultiDeviceLaunch;
        break;
    case mcDeviceAttributeIntegrated:
        *value = prop.integrated;
        break;
    case mcDeviceAttributeMaxTexture1DWidth:
        *value = prop.maxTexture1D;
        break;
    case mcDeviceAttributeMaxTexture2DWidth:
        *value = prop.maxTexture2D[0];
        break;
    case mcDeviceAttributeMaxTexture2DHeight:
        *value = prop.maxTexture2D[1];
        break;
    case mcDeviceAttributeMaxTexture3DWidth:
        *value = prop.maxTexture3D[0];
        break;
    case mcDeviceAttributeMaxTexture3DHeight:
        *value = prop.maxTexture3D[1];
        break;
    case mcDeviceAttributeMaxTexture3DDepth:
        *value = prop.maxTexture3D[2];
        break;
    case mcDeviceAttributeHdpMemFlushCntl:
        *(unsigned int **)(value) = prop.hdpMemFlushCntl;
        break;
    case mcDeviceAttributeHdpRegFlushCntl:
        *(unsigned int **)(value) = prop.hdpRegFlushCntl;
        break;
    case mcDeviceAttributeMaxPitch:
        *value = prop.memPitch;
        break;
    case mcDeviceAttributeTextureAlignment:
        *value = prop.textureAlignment;
        break;
    case mcDeviceAttributeTexturePitchAlignment:
        *value = prop.texturePitchAlignment;
        break;
    case mcDeviceAttributeKernelExecTimeout:
        *value = prop.kernelExecTimeoutEnabled;
        break;
    case mcDeviceAttributeCanMapHostMemory:
        *value = prop.canMapHostMemory;
        break;
    case mcDeviceAttributeEccEnabled:
        *value = prop.ECCEnabled;
        break;
    case mcDeviceAttributeCooperativeMultiDeviceUnmatchedFunc:
        *value = prop.cooperativeMultiDeviceUnmatchedFunc;
        break;
    case mcDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim:
        *value = prop.cooperativeMultiDeviceUnmatchedGridDim;
        break;
    case mcDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim:
        *value = prop.cooperativeMultiDeviceUnmatchedBlockDim;
        break;
    case mcDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem:
        *value = prop.cooperativeMultiDeviceUnmatchedSharedMem;
        break;
    case mcDeviceAttributeAsicRevision:
        *value = prop.asicRevision;
        break;
    case mcDeviceAttributeManagedMemory:
        *value = prop.managedMemory;
        break;
    case mcDeviceAttributeDirectManagedMemAccessFromHost:
        *value = prop.directManagedMemAccessFromHost;
        break;
    case mcDeviceAttributeConcurrentManagedAccess:
        *value = prop.concurrentManagedAccess;
        break;
    case mcDeviceAttributePageableMemoryAccess:
        *value = prop.pageableMemoryAccess;
        break;
    case mcDeviceAttributePageableMemoryAccessUsesHostPageTables:
        *value = prop.pageableMemoryAccessUsesHostPageTables;
        break;
    case mcDeviceAttributeCanUseStreamWaitValue:
        *value = 1;
        break;
    case mcDeviceAttributeCanUseStreamMemOps:
        *value = 1;
        break;
    case mcDeviceAttributeCanUseStreamWaitWaitValueNor:
        *value = 1;
        break;
    case mcDeviceAttributeCanFlushRemoteWrites:
        *value = 0;
        break;
    case mcDeviceAttributeMemoryPoolsSupported:
        *value = 1;
        break;
    case mcDeviceAttributeUnifiedAddressing:
        *value = prop.unifiedAddressing;
        break;
    case mcDeviceAttributeMaxAccessPolicyWindowSize:
        *value = prop.accessPolicyMaxWindowSize;
        break;
    case mcDeviceAttributeMaxPersistingL2CacheSize:
        *value = prop.persistingL2CacheMaxSize;
        break;
    case mcDeviceAttributeGpuOverlap:
        *value = prop.deviceOverlap;
        break;
    case mcDeviceAttributeAsyncEngineCount:
        *value = prop.asyncEngineCount;
        break;
    case mcDeviceAttributeMemoryPoolSupportedHandleTypes:
        *value = mcMemHandleTypePosixFileDescriptor;
        break;
    case mcDeviceAttributeTexture1DLinearWidth:
        *value = prop.maxTexture1DLinear;
        break;
    case mcDeviceAttributeTexture2DLinearWidth:
        *value = prop.maxTexture2DLinear[0];
        break;
    case mcDeviceAttributeTexture2DLinearHeight:
        *value = prop.maxTexture2DLinear[1];
        break;
    case mcDeviceAttributeTexture2DLinearPitch:
        *value = prop.maxTexture2DLinear[2];
        break;
    case mcDeviceAttributeHostNativeAtomicSupported:
        *value = 1;
        break;
    case mcDevAttrMaxBlocksPerMultiprocessor:
        *value = prop.maxBlocksPerMultiProcessor;
        break;
    default:
        *value = 0;
        break;
    }

    WCUDA_RETURN(mcSuccess);
}

mcError_t wcudaDeviceGetByPCIBusId(int *device, const char *pciBusId)
{
    WCUDA_RETURN(mcDeviceGetByPCIBusId(device, pciBusId));
}

mcError_t wcudaDeviceGetPCIBusId(char *pciBusId, int len, int device)
{
    WCUDA_RETURN(mcDeviceGetPCIBusId(pciBusId, len, device));
}

mcError_t wcudaDeviceGetSharedMemConfig(enum mcSharedMemConfig *pConfig)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceGetSharedMemConfig(pConfig));
}

mcError_t wcudaDeviceSetSharedMemConfig(enum mcSharedMemConfig config)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceSetSharedMemConfig(config));
}

mcError_t wcudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority)
{
    WCUDA_RETURN(mcDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
}

mcError_t wcudaDeviceSynchronize(void)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceSynchronize());
}

mcError_t wcudaChooseDevice(int *deviceId, const mcDeviceProp_t *prop)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcChooseDevice(deviceId, prop));
}

mcError_t wcudaDeviceGetDefaultMemPool(mcMemPool_t *memPool, int device)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceGetDefaultMemPool(memPool, device));
}

mcError_t wcudaDeviceGetMemPool(mcMemPool_t *memPool, int device)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceGetMemPool(memPool, device));
}

mcError_t wcudaDeviceSetMemPool(int dev, mcMemPool_t pool)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceSetMemPool(dev, pool));
}

mcError_t wcudaDeviceGetCacheConfig(enum mcFuncCache_t *cacheConfig)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceGetCacheConfig(cacheConfig));
}

mcError_t wcudaDeviceSetCacheConfig(enum mcFuncCache_t cacheConfig)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceSetCacheConfig(cacheConfig));
}

mcError_t wcudaDeviceGetLimit(size_t *pValue, enum mcLimit_t limit)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceGetLimit(pValue, limit));
}

mcError_t wcudaDeviceSetLimit(enum mcLimit_t limit, size_t value)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceSetLimit(limit, value));
}

mcError_t wcudaDeviceReset(void)
{
    DEACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceReset());
}

mcError_t wcudaSetValidDevices(int *deviceArray, int len)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcSetValidDevices(deviceArray, len));
}

mcError_t wcudaDeviceFlushGPUDirectRDMAWrites(enum mcFlushGPUDirectRDMAWritesTarget target,
                                              enum mcFlushGPUDirectRDMAWritesScope scope)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceFlushGPUDirectRDMAWrites(target, scope));
}

mcError_t wcudaDeviceGetP2PAttribute(int *value, enum mcDeviceP2PAttr attr, int srcDevice,
                                     int dstDevice)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
}

mcError_t wcudaDeviceGetNvSciSyncAttributes(void *mcSciSyncAttrList, int device, int flags)
{
    /* temp do not support this api, so just add wrapper here for complier*/
    WCUDA_RETURN(mcSuccess);
}

mcError_t wcudaDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements,
                                                const mcChannelFormatDesc *fmtDesc, int device)
{
    ACTIVE_CONTEXT();

    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

//---------------------------------------------------------------------------//
// Thread Management
//---------------------------------------------------------------------------//
mcError_t wcudaThreadGetLimit(size_t *pValue, enum mcLimit_t limit)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceGetLimit(pValue, limit));
}

mcError_t wcudaThreadSetLimit(enum mcLimit_t limit, size_t value)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceSetLimit(limit, value));
}

mcError_t wcudaThreadGetCacheConfig(enum mcFuncCache_t *cacheConfig)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceGetCacheConfig(cacheConfig));
}

mcError_t wcudaThreadSetCacheConfig(enum mcFuncCache_t cacheConfig)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceSetCacheConfig(cacheConfig));
}

mcError_t wcudaThreadSynchronize(void)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceSynchronize());
}

mcError_t wcudaThreadExit(void)
{
    DEACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceReset());
}

//---------------------------------------------------------------------------//
// Error Handling
//---------------------------------------------------------------------------//
mcError_t wcudaGetLastError(void) { return wcudaGetLastErrorImpl(); }

mcError_t wcudaPeekAtLastError(void) { return wcudaPeekAtLastErrorImpl(); }

const char *wcudaGetErrorString(mcError_t mc_error)
{
    switch (mc_error) {
    case mcSuccess:
        return "no error";
    case mcErrorInvalidValue:
        return "invalid argument";
    case mcErrorMemoryAllocation:
        return "out of memory";
    case mcErrorInitializationError:
        return "initialization error";
    case mcErrorDeinitialized:
        return "driver shutting down";
    case mcErrorProfilerDisabled:
        return "profiler disabled while using external "
               "profiling tool";
    case mcErrorProfilerNotInitialized:
        return "profiler not initialized: call "
               "cudaProfilerInitialize()";
    case mcErrorProfilerAlreadyStarted:
        return "profiler already started";
    case mcErrorProfilerAlreadyStopped:
        return "profiler already stopped";
    case mcErrorInvalidConfiguration:
        return "invalid configuration argument";
    case mcErrorInvalidPitchValue:
        return "invalid pitch argument";
    case mcErrorInvalidSymbol:
        return "invalid device symbol";
    case mcErrorInvalidHostPointer:
        return "invalid host pointer";
    case mcErrorInvalidDevicePointer:
        return "invalid device pointer";
    case mcErrorInvalidTexture:
        return "invalid texture reference";
    case mcErrorInvalidTextureBinding:
        return "texture is not bound to a pointer";
    case mcErrorInvalidChannelDescriptor:
        return "invalid channel descriptor";
    case mcErrorInvalidMemcpyDirection:
        return "invalid copy direction for memcpy";
    case mcErrorAddressOfConstant:
        return "invalid address of constant";
    case mcErrorTextureFetchFailed:
        return "fetch from texture failed";
    case mcErrorTextureNotBound:
        return "cannot fetch from a texture that is not bound";
    case mcErrorSynchronizationError:
        return "incorrect use of __syncthreads()";
    case mcErrorInvalidFilterSetting:
        return "linear filtering not supported for non-float "
               "type";
    case mcErrorInvalidNormSetting:
        return "read as normalized float not supported for "
               "32-bit non float type";
    case mcErrorMixedDeviceExecution:
        return "device emulation mode and device execution "
               "mode cannot be mixed";
    case mcErrorNotYetImplemented:
        return "feature not yet implemented";
    case mcErrorMemoryValueTooLarge:
        return "memory size or pointer value too large to fit "
               "in 32 bit";
    case mcErrorStubLibrary:
        return "CUDA driver is a stub library";
    case mcErrorInsufficientDriver:
        return "CUDA driver version is insufficient for CUDA "
               "runtime version";
    case mcErrorCallRequiresNewerDriver:
        return "API call is not supported in the installed "
               "CUDA driver";
    case mcErrorInvalidSurface:
        return "invalid surface reference";
    case mcErrorDuplicateVariableName:
        return "duplicate global variable looked up by string "
               "name";
    case mcErrorDuplicateTextureName:
        return "duplicate texture looked up by string name";
    case mcErrorDuplicateSurfaceName:
        return "duplicate surface looked up by string name";
    case mcErrorDevicesUnavailable:
        return "all CUDA-capable devices are busy or "
               "unavailable";
    case mcErrorIncompatibleDriverContext:
        return "incompatible driver context";
    case mcErrorMissingConfiguration:
        return "__global__ function call is not configured";
    case mcErrorPriorLaunchFailure:
        return "unspecified launch failure in prior launch";
    case mcErrorLaunchMaxDepthExceeded:
        return "launch would exceed maximum depth of nested "
               "launches";
    case mcErrorLaunchFileScopedTex:
        return "launch failed because kernel uses unsupported, "
               "file-scoped "
               "textures (texture objects are supported)";
    case mcErrorLaunchFileScopedSurf:
        return "launch failed because kernel uses unsupported, "
               "file-scoped "
               "surfaces (surface objects are supported)";
    case mcErrorSyncDepthExceeded:
        return "cudaDeviceSynchronize failed because caller's "
               "grid depth exceeds "
               "cudaLimitDevRuntimeSyncDepth";
    case mcErrorLaunchPendingCountExceeded:
        return "launch failed because launch would exceed "
               "cudaLimitDevRuntimePendingLaunchCount";
    case mcErrorInvalidDeviceFunction:
        return "invalid device function";
    case mcErrorNoDevice:
        return "no CUDA-capable device is detected";
    case mcErrorInvalidDevice:
        return "invalid device ordinal";
    case mcErrorDeviceNotLicensed:
        return "device doesn't have valid Grid license";
    case mcErrorSoftwareValidityNotEstablished:
        return "integrity checks failed";
    case mcErrorStartupFailure:
        return "startup failure in cuda runtime";
    case mcErrorInvalidKernelImage:
        return "device kernel image is invalid";
    case mcErrorDeviceUninitialized:
        return "invalid device context";
    case mcErrorMapBufferObjectFailed:
        return "mapping of buffer object failed";
    case mcErrorUnmapBufferObjectFailed:
        return "unmapping of buffer object failed";
    case mcErrorArrayIsMapped:
        return "array is mapped";
    case mcErrorAlreadyMapped:
        return "resource already mapped";
    case mcErrorNoKernelImageForDevice:
        return "no kernel image is available for execution on "
               "the device";
    case mcErrorAlreadyAcquired:
        return "resource already acquired";
    case mcErrorNotMapped:
        return "resource not mapped";
    case mcErrorNotMappedAsArray:
        return "resource not mapped as array";
    case mcErrorNotMappedAsPointer:
        return "resource not mapped as pointer";
    case mcErrorECCUncorrectable:
        return "uncorrectable ECC error encountered";
    case mcErrorUnsupportedLimit:
        return "limit is not supported on this architecture";
    case mcErrorDeviceAlreadyInUse:
        return "exclusive-thread device already in use by a "
               "different thread";
    case mcErrorPeerAccessUnsupported:
        return "peer access is not supported between these two "
               "devices";
    case mcErrorInvalidKernelFile:
        return "a PTX JIT compilation failed";
    case mcErrorInvalidGraphicsContext:
        return "invalid OpenGL or DirectX context";
    case mcErrorMxlinkUncorrectable:
        return "uncorrectable NVLink error detected during the "
               "execution";
    case mcErrorJitCompilerNotFound:
        return "PTX JIT compiler library not found";
    case mcErrorUnsupportedKernelVersion:
        return "the provided PTX was compiled with an "
               "unsupported toolchain.";
    case mcErrorJitCompilationDisabled:
        return "PTX JIT compilation was disabled";
    case mcErrorUnsupportedExecAffinity:
        return "the provided execution affinity is not "
               "supported";
    case mcErrorInvalidSource:
        return "device kernel image is invalid";
    case mcErrorFileNotFound:
        return "file not found";
    case mcErrorSharedObjectSymbolNotFound:
        return "shared object symbol not found";
    case mcErrorSharedObjectInitFailed:
        return "shared object initialization failed";
    case mcErrorOperatingSystem:
        return "OS call failed or operation not supported on "
               "this OS";
    case mcErrorInvalidResourceHandle:
        return "invalid resource handle";
    case mcErrorIllegalState:
        return "the operation cannot be performed in the "
               "present state";
    case mcErrorSymbolNotFound:
        return "named symbol not found";
    case mcErrorNotReady:
        return "device not ready";
    case mcErrorIllegalAddress:
        return "an illegal memory access was encountered";
    case mcErrorLaunchOutOfResources:
        return "too many resources requested for launch";
    case mcErrorLaunchTimeout:
        return "the launch timed out and was terminated";
    case mcErrorLaunchIncompatibleTexturing:
        return "launch uses incompatible texturing mode";
    case mcErrorPeerAccessAlreadyEnabled:
        return "peer access is already enabled";
    case mcErrorPeerAccessNotEnabled:
        return "peer access has not been enabled";
    case mcErrorSetOnActiveProcess:
        return "cannot set while device is active in this "
               "process";
    case mcErrorContextIsDestroyed:
        return "context is destroyed";
    case mcErrorAssert:
        return "device-side assert triggered";
    case mcErrorTooManyPeers:
        return "peer mapping resources exhausted";
    case mcErrorHostMemoryAlreadyRegistered:
        return "part or all of the requested memory range is "
               "already mapped";
    case mcErrorHostMemoryNotRegistered:
        return "pointer does not correspond to a registered "
               "memory region";
    case mcErrorHardwareStackError:
        return "hardware stack error";
    case mcErrorIllegalInstruction:
        return "an illegal instruction was encountered";
    case mcErrorMisalignedAddress:
        return "misaligned address";
    case mcErrorInvalidAddressSpace:
        return "operation not supported on global/shared "
               "address space";
    case mcErrorInvalidPc:
        return "invalid program counter";
    case mcErrorLaunchFailure:
        return "unspecified launch failure";
    case mcErrorCooperativeLaunchTooLarge:
        return "too many blocks in cooperative launch";
    case mcErrorNotPermitted:
        return "operation not permitted";
    case mcErrorNotSupported:
        return "operation not supported";
    case mcErrorSystemNotReady:
        return "system not yet initialized";
    case mcErrorSystemDriverMismatch:
        return "system has unsupported display driver / cuda "
               "driver combination";
    case mcErrorCompatNotSupportedOnDevice:
        return "forward compatibility was attempted on non "
               "supported HW";
    case mcErrorMpsConnectionFailed:
        return "MPS client failed to connect to the MPS "
               "control daemon or the "
               "MPS server";
    case mcErrorMpsRpcFailure:
        return "the remote procedural call between the MPS "
               "server and the MPS "
               "client failed";
    case mcErrorMpsServerNotReady:
        return "MPS server is not ready to accept new MPS "
               "client requests";
    case mcErrorMpsMaxClientsReached:
        return "the hardware resources required to create MPS "
               "client have been "
               "exhausted";
    case mcErrorMpsMaxConnectionsReached:
        return "the hardware resources required to support "
               "device connections "
               "have been exhausted";
    case mcErrorStreamCaptureUnsupported:
        return "operation not permitted when stream is "
               "capturing";
    case mcErrorStreamCaptureInvalidated:
        return "operation failed due to a previous error "
               "during capture";
    case mcErrorStreamCaptureMerge:
        return "operation would result in a merge of separate "
               "capture sequences";
    case mcErrorStreamCaptureUnmatched:
        return "capture was not ended in the same stream as it "
               "began";
    case mcErrorStreamCaptureUnjoined:
        return "capturing stream has unjoined work";
    case mcErrorStreamCaptureIsolation:
        return "dependency created on uncaptured work in "
               "another stream";
    case mcErrorStreamCaptureImplicit:
        return "operation would make the legacy stream depend "
               "on a capturing "
               "blocking stream";
    case mcErrorCapturedEvent:
        return "operation not permitted on an event last "
               "recorded in a capturing "
               "stream";
    case mcErrorStreamCaptureWrongThread:
        return "attempt to terminate a thread-local capture "
               "sequence from "
               "another thread";
    case mcErrorTimeout:
        return "wait operation timed out";
    case mcErrorGraphExecUpdateFailure:
        return "the graph update was not performed because it "
               "included changes "
               "which violated constraints specific to "
               "instantiated graph update";
    case mcErrorExternalDevice:
        return "an async error has occured in external entity "
               "outside of CUDA";
    case mcErrorUnknown:
        return "unknown error";
    case mcErrorApiFailureBase:
        return "api failure base";
    default:
        return "unrecognized error code";
    }
}

const char *wcudaGetErrorName(mcError_t mc_error)
{
    switch (mc_error) {
    case mcSuccess:
        return "cudaSuccess";
    case mcErrorInvalidValue:
        return "cudaErrorInvalidValue";
    case mcErrorMemoryAllocation:
        return "cudaErrorMemoryAllocation";
    case mcErrorInitializationError:
        return "cudaErrorInitializationError";
    case mcErrorDeinitialized:
        return "cudaErrorCudartUnloading";
    case mcErrorProfilerDisabled:
        return "cudaErrorProfilerDisabled";
    case mcErrorProfilerNotInitialized:
        return "cudaErrorProfilerNotInitialized";
    case mcErrorProfilerAlreadyStarted:
        return "cudaErrorProfilerAlreadyStarted";
    case mcErrorProfilerAlreadyStopped:
        return "cudaErrorProfilerAlreadyStopped";
    case mcErrorInvalidConfiguration:
        return "cudaErrorInvalidConfiguration";
    case mcErrorInvalidPitchValue:
        return "cudaErrorInvalidPitchValue";
    case mcErrorInvalidSymbol:
        return "cudaErrorInvalidSymbol";
    case mcErrorInvalidHostPointer:
        return "cudaErrorInvalidHostPointer";
    case mcErrorInvalidDevicePointer:
        return "cudaErrorInvalidDevicePointer";
    case mcErrorInvalidTexture:
        return "cudaErrorInvalidTexture";
    case mcErrorInvalidTextureBinding:
        return "cudaErrorInvalidTextureBinding";
    case mcErrorInvalidChannelDescriptor:
        return "cudaErrorInvalidChannelDescriptor";
    case mcErrorInvalidMemcpyDirection:
        return "cudaErrorInvalidMemcpyDirection";
    case mcErrorAddressOfConstant:
        return "cudaErrorAddressOfConstant";
    case mcErrorTextureFetchFailed:
        return "cudaErrorTextureFetchFailed";
    case mcErrorTextureNotBound:
        return "cudaErrorTextureNotBound";
    case mcErrorSynchronizationError:
        return "cudaErrorSynchronizationError";
    case mcErrorInvalidFilterSetting:
        return "cudaErrorInvalidFilterSetting";
    case mcErrorInvalidNormSetting:
        return "cudaErrorInvalidNormSetting";
    case mcErrorMixedDeviceExecution:
        return "cudaErrorMixedDeviceExecution";
    case mcErrorNotYetImplemented:
        return "cudaErrorNotYetImplemented";
    case mcErrorMemoryValueTooLarge:
        return "cudaErrorMemoryValueTooLarge";
    case mcErrorStubLibrary:
        return "cudaErrorStubLibrary";
    case mcErrorInsufficientDriver:
        return "cudaErrorInsufficientDriver";
    case mcErrorCallRequiresNewerDriver:
        return "cudaErrorCallRequiresNewerDriver";
    case mcErrorInvalidSurface:
        return "cudaErrorInvalidSurface";
    case mcErrorDuplicateVariableName:
        return "cudaErrorDuplicateVariableName";
    case mcErrorDuplicateTextureName:
        return "cudaErrorDuplicateTextureName";
    case mcErrorDuplicateSurfaceName:
        return "cudaErrorDuplicateSurfaceName";
    case mcErrorDevicesUnavailable:
        return "cudaErrorDevicesUnavailable";
    case mcErrorIncompatibleDriverContext:
        return "cudaErrorIncompatibleDriverContext";
    case mcErrorMissingConfiguration:
        return "cudaErrorMissingConfiguration";
    case mcErrorPriorLaunchFailure:
        return "cudaErrorPriorLaunchFailure";
    case mcErrorLaunchMaxDepthExceeded:
        return "cudaErrorLaunchMaxDepthExceeded";
    case mcErrorLaunchFileScopedTex:
        return "cudaErrorLaunchFileScopedTex";
    case mcErrorLaunchFileScopedSurf:
        return "cudaErrorLaunchFileScopedSurf";
    case mcErrorSyncDepthExceeded:
        return "cudaErrorSyncDepthExceeded";
    case mcErrorLaunchPendingCountExceeded:
        return "cudaErrorLaunchPendingCountExceeded";
    case mcErrorInvalidDeviceFunction:
        return "cudaErrorInvalidDeviceFunction";
    case mcErrorNoDevice:
        return "cudaErrorNoDevice";
    case mcErrorInvalidDevice:
        return "cudaErrorInvalidDevice";
    case mcErrorDeviceNotLicensed:
        return "cudaErrorDeviceNotLicensed";
    case mcErrorSoftwareValidityNotEstablished:
        return "cudaErrorSoftwareValidityNotEstablished";
    case mcErrorStartupFailure:
        return "cudaErrorStartupFailure";
    case mcErrorInvalidKernelImage:
        return "cudaErrorInvalidKernelImage";
    case mcErrorDeviceUninitialized:
        return "cudaErrorDeviceUninitialized";
    case mcErrorMapBufferObjectFailed:
        return "cudaErrorMapBufferObjectFailed";
    case mcErrorUnmapBufferObjectFailed:
        return "cudaErrorUnmapBufferObjectFailed";
    case mcErrorArrayIsMapped:
        return "cudaErrorArrayIsMapped";
    case mcErrorAlreadyMapped:
        return "cudaErrorAlreadyMapped";
    case mcErrorNoKernelImageForDevice:
        return "cudaErrorNoKernelImageForDevice";
    case mcErrorAlreadyAcquired:
        return "cudaErrorAlreadyAcquired";
    case mcErrorNotMapped:
        return "cudaErrorNotMapped";
    case mcErrorNotMappedAsArray:
        return "cudaErrorNotMappedAsArray";
    case mcErrorNotMappedAsPointer:
        return "cudaErrorNotMappedAsPointer";
    case mcErrorECCUncorrectable:
        return "cudaErrorECCUncorrectable";
    case mcErrorUnsupportedLimit:
        return "cudaErrorUnsupportedLimit";
    case mcErrorDeviceAlreadyInUse:
        return "cudaErrorDeviceAlreadyInUse";
    case mcErrorPeerAccessUnsupported:
        return "cudaErrorPeerAccessUnsupported";
    case mcErrorInvalidKernelFile:
        return "cudaErrorInvalidPtx";
    case mcErrorInvalidGraphicsContext:
        return "cudaErrorInvalidGraphicsContext";
    case mcErrorMxlinkUncorrectable:
        return "cudaErrorNvlinkUncorrectable";
    case mcErrorJitCompilerNotFound:
        return "cudaErrorJitCompilerNotFound";
    case mcErrorUnsupportedKernelVersion:
        return "cudaErrorUnsupportedPtxVersion";
    case mcErrorJitCompilationDisabled:
        return "cudaErrorJitCompilationDisabled";
    case mcErrorUnsupportedExecAffinity:
        return "cudaErrorUnsupportedExecAffinity";
    case mcErrorInvalidSource:
        return "cudaErrorInvalidSource";
    case mcErrorFileNotFound:
        return "cudaErrorFileNotFound";
    case mcErrorSharedObjectSymbolNotFound:
        return "cudaErrorSharedObjectSymbolNotFound";
    case mcErrorSharedObjectInitFailed:
        return "cudaErrorSharedObjectInitFailed";
    case mcErrorOperatingSystem:
        return "cudaErrorOperatingSystem";
    case mcErrorInvalidResourceHandle:
        return "cudaErrorInvalidResourceHandle";
    case mcErrorIllegalState:
        return "cudaErrorIllegalState";
    case mcErrorSymbolNotFound:
        return "cudaErrorSymbolNotFound";
    case mcErrorNotReady:
        return "cudaErrorNotReady";
    case mcErrorIllegalAddress:
        return "cudaErrorIllegalAddress";
    case mcErrorLaunchOutOfResources:
        return "cudaErrorLaunchOutOfResources";
    case mcErrorLaunchTimeout:
        return "cudaErrorLaunchTimeout";
    case mcErrorLaunchIncompatibleTexturing:
        return "cudaErrorLaunchIncompatibleTexturing";
    case mcErrorPeerAccessAlreadyEnabled:
        return "cudaErrorPeerAccessAlreadyEnabled";
    case mcErrorPeerAccessNotEnabled:
        return "cudaErrorPeerAccessNotEnabled";
    case mcErrorSetOnActiveProcess:
        return "cudaErrorSetOnActiveProcess";
    case mcErrorContextIsDestroyed:
        return "cudaErrorContextIsDestroyed";
    case mcErrorAssert:
        return "cudaErrorAssert";
    case mcErrorTooManyPeers:
        return "cudaErrorTooManyPeers";
    case mcErrorHostMemoryAlreadyRegistered:
        return "cudaErrorHostMemoryAlreadyRegistered";
    case mcErrorHostMemoryNotRegistered:
        return "cudaErrorHostMemoryNotRegistered";
    case mcErrorHardwareStackError:
        return "cudaErrorHardwareStackError";
    case mcErrorIllegalInstruction:
        return "cudaErrorIllegalInstruction";
    case mcErrorMisalignedAddress:
        return "cudaErrorMisalignedAddress";
    case mcErrorInvalidAddressSpace:
        return "cudaErrorInvalidAddressSpace";
    case mcErrorInvalidPc:
        return "cudaErrorInvalidPc";
    case mcErrorLaunchFailure:
        return "cudaErrorLaunchFailure";
    case mcErrorCooperativeLaunchTooLarge:
        return "cudaErrorCooperativeLaunchTooLarge";
    case mcErrorNotPermitted:
        return "cudaErrorNotPermitted";
    case mcErrorNotSupported:
        return "cudaErrorNotSupported";
    case mcErrorSystemNotReady:
        return "cudaErrorSystemNotReady";
    case mcErrorSystemDriverMismatch:
        return "cudaErrorSystemDriverMismatch";
    case mcErrorCompatNotSupportedOnDevice:
        return "cudaErrorCompatNotSupportedOnDevice";
    case mcErrorMpsConnectionFailed:
        return "cudaErrorMpsConnectionFailed";
    case mcErrorMpsRpcFailure:
        return "cudaErrorMpsRpcFailure";
    case mcErrorMpsServerNotReady:
        return "cudaErrorMpsServerNotReady";
    case mcErrorMpsMaxClientsReached:
        return "cudaErrorMpsMaxClientsReached";
    case mcErrorMpsMaxConnectionsReached:
        return "cudaErrorMpsMaxConnectionsReached";
    case mcErrorStreamCaptureUnsupported:
        return "cudaErrorStreamCaptureUnsupported";
    case mcErrorStreamCaptureInvalidated:
        return "cudaErrorStreamCaptureInvalidated";
    case mcErrorStreamCaptureMerge:
        return "cudaErrorStreamCaptureMerge";
    case mcErrorStreamCaptureUnmatched:
        return "cudaErrorStreamCaptureUnmatched";
    case mcErrorStreamCaptureUnjoined:
        return "cudaErrorStreamCaptureUnjoined";
    case mcErrorStreamCaptureIsolation:
        return "cudaErrorStreamCaptureIsolation";
    case mcErrorStreamCaptureImplicit:
        return "cudaErrorStreamCaptureImplicit";
    case mcErrorCapturedEvent:
        return "cudaErrorCapturedEvent";
    case mcErrorStreamCaptureWrongThread:
        return "cudaErrorStreamCaptureWrongThread";
    case mcErrorTimeout:
        return "cudaErrorTimeout";
    case mcErrorGraphExecUpdateFailure:
        return "cudaErrorGraphExecUpdateFailure";
    case mcErrorExternalDevice:
        return "cudaErrorExternalDevice";
    case mcErrorUnknown:
        return "cudaErrorUnknown";
    case mcErrorApiFailureBase:
        return "cudaErrorApiFailureBase";
    default:
        return "unrecognized error code";
    }
}

//---------------------------------------------------------------------------//
// Stream Management
//---------------------------------------------------------------------------//
mcError_t wcudaStreamCreate(mcStream_t *stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamCreate(stream));
}

mcError_t wcudaStreamCreateWithFlags(mcStream_t *pStream, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamCreateWithFlags(pStream, flags));
}

mcError_t wcudaStreamCreateWithPriority(mcStream_t *stream, unsigned int flags, int priority)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamCreateWithPriority(stream, flags, priority));
}

mcError_t wcudaStreamDestroy(mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamDestroy(stream));
}

mcError_t wcudaStreamGetFlags(mcStream_t hStream, unsigned int *flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamGetFlags(hStream, flags));
}

mcError_t wcudaStreamGetPriority(mcStream_t hStream, int *priority)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamGetPriority(hStream, priority));
}

mcError_t wcudaStreamQuery(mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamQuery(stream));
}

mcError_t wcudaStreamSynchronize(mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamSynchronize(stream));
}

mcError_t wcudaStreamWaitEvent(mcStream_t stream, mcEvent_t event, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamWaitEvent(stream, event, flags));
}

mcError_t wcudaStreamAddCallback(mcStream_t stream, mcStreamCallback_t callback, void *userData,
                                 unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamAddCallback(stream, callback, userData, flags));
}

mcError_t wcudaStreamAttachMemAsync(mcStream_t stream, void *devPtr, size_t length,
                                    unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamAttachMemAsync(stream, devPtr, length, flags));
}

mcError_t wcudaStreamCopyAttributes(mcStream_t dst, mcStream_t src)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamCopyAttributes(dst, src));
}

mcError_t wcudaStreamSetAttribute(mcStream_t hStream, mcStreamAttrID attr,
                                  const mcStreamAttrValue *value)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamSetAttribute(hStream, attr, value));
}
mcError_t wcudaStreamGetAttribute(mcStream_t hStream, mcStreamAttrID attr,
                                  mcStreamAttrValue *value_out)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamGetAttribute(hStream, attr, value_out));
}

mcError_t wcudaStreamBeginCapture(mcStream_t stream, mcStreamCaptureMode mode)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamBeginCapture(stream, mode));
}

mcError_t wcudaStreamEndCapture(mcStream_t stream, mcGraph_t *pGraph)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamEndCapture(stream, pGraph));
}

mcError_t wcudaStreamIsCapturing(mcStream_t stream, mcStreamCaptureStatus *status)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamIsCapturing(stream, status));
}

mcError_t wcudaStreamGetCaptureInfo(mcStream_t stream, mcStreamCaptureStatus *captureStatus_out,
                                    unsigned long long *id_out)
{
    ACTIVE_CONTEXT();
    mcGraph_t *graph_out                   = 0;
    const mcGraphNode_t **dependencies_out = 0;
    size_t *numDependencies_out            = 0;
    WCUDA_RETURN(mcStreamGetCaptureInfo(stream, captureStatus_out, id_out, graph_out,
                                        dependencies_out, numDependencies_out));
}

mcError_t wcudaStreamGetCaptureInfo_v2(mcStream_t stream, mcStreamCaptureStatus *captureStatus_out,
                                       unsigned long long *id_out, mcGraph_t *graph_out,
                                       const mcGraphNode_t **dependencies_out,
                                       size_t *numDependencies_out)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamGetCaptureInfo(stream, captureStatus_out, id_out, graph_out,
                                        dependencies_out, numDependencies_out));
}

mcError_t wcudaStreamUpdateCaptureDependencies(mcStream_t stream, mcGraphNode_t *dependencies,
                                               size_t numDependencies, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcStreamUpdateCaptureDependencies(stream, dependencies, numDependencies, flags));
}

mcError_t wcudaThreadExchangeStreamCaptureMode(mcStreamCaptureMode *mode)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcThreadExchangeStreamCaptureMode(mode));
}

mcError_t wcudaCtxResetPersistingL2Cache(void)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcCtxResetPersistingL2Cache());
}

//---------------------------------------------------------------------------//
// Event Management
//---------------------------------------------------------------------------//
mcError_t wcudaIpcGetEventHandle(mcIpcEventHandle_t *ipc_hdl, mcEvent_t event)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcIpcGetEventHandle(ipc_hdl, event));
}

mcError_t wcudaIpcOpenEventHandle(mcEvent_t *event, mcIpcEventHandle_t ipc_hdl)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcIpcOpenEventHandle(event, ipc_hdl));
}

mcError_t wcudaEventCreate(mcEvent_t *event)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcEventCreate(event));
}

mcError_t wcudaEventCreate(mcEvent_t *event, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcEventCreate(event, flags));
}

mcError_t wcudaEventCreateWithFlags(mcEvent_t *event, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcEventCreateWithFlags(event, flags));
}

mcError_t wcudaEventDestroy(mcEvent_t event)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcEventDestroy(event));
}

mcError_t wcudaEventElapsedTime(float *ms, mcEvent_t start, mcEvent_t stop)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcEventElapsedTime(ms, start, stop));
}

mcError_t wcudaEventQuery(mcEvent_t event)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcEventQuery(event));
}

mcError_t wcudaEventRecord(mcEvent_t event, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcEventRecord(event, stream));
}

mcError_t wcudaEventRecordWithFlags(mcEvent_t event, mcStream_t stream, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcEventRecordWithFlags(event, stream, flags));
}

mcError_t wcudaEventSynchronize(mcEvent_t event)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcEventSynchronize(event));
}
//---------------------------------------------------------------------------//
// Execution Control
//---------------------------------------------------------------------------//
mcError_t wcudaFuncGetAttributes(struct mcFuncAttributes *attr, const void *func)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcFuncGetAttributes(attr, func));
}

mcError_t wcudaLaunchKernel(const void *hostFunction, dim3 gridDim, dim3 blockDim, void **args,
                            size_t sharedMemBytes, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream));
}

mcError_t wcudaFuncSetAttribute(const void *func, mcFuncAttribute attr, int value)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcFuncSetAttribute(func, attr, value));
}

mcError_t wcudaFuncSetCacheConfig(const void *func, enum mcFuncCache_t cacheConfig)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcFuncSetCacheConfig(func, cacheConfig));
}

mcError_t wcudaFuncSetSharedMemConfig(const void *func, enum mcSharedMemConfig config)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcFuncSetSharedMemConfig(func, config));
}

mcError_t wcudaLaunchCooperativeKernel(const void *f, dim3 gridDim, dim3 blockDim,
                                       void **kernelParams, size_t sharedMemBytes,
                                       mcStream_t stream)
{
    ACTIVE_CONTEXT();
    if (sharedMemBytes > (size_t)std::numeric_limits<unsigned int>::max()) {
        WCUDA_RETURN(mcErrorInvalidValue);
    } else {
        WCUDA_RETURN(mcLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams,
                                               (unsigned int)sharedMemBytes, stream));
    }
}

mcError_t wcudaLaunchHostFunc(mcStream_t stream, mcHostFn_t fn, void *userData)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcLaunchHostFunc(stream, fn, userData));
}

mcError_t wcudaLaunchCooperativeKernelMultiDevice(mcLaunchParams *launchParamsList,
                                                  unsigned int numDevices, unsigned flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
}
//---------------------------------------------------------------------------//
// Occupancy
//---------------------------------------------------------------------------//

mcError_t wcudaOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, const void *f,
                                                     int numBlocks, int blockSize)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcOccupancyAvailableDynamicSMemPerBlock(dynamicSmemSize, f, numBlocks, blockSize));
}

mcError_t wcudaOccupancyMaxPotentialBlockSize(int *gridSize, int *blockSize, const void *f,
                                              size_t dynSharedMemPerBlk, int blockSizeLimit)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, dynSharedMemPerBlk,
                                                  blockSizeLimit));
}

mcError_t wcudaOccupancyMaxPotentialBlockSizeWithFlags(int *gridSize, int *blockSize, const void *f,
                                                       size_t dynSharedMemPerBlk,
                                                       int blockSizeLimit, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcOccupancyMaxPotentialBlockSizeWithFlags(
        gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit, flags));
}

mcError_t wcudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *f,
                                                         int blockSize, size_t dynamicSMemSize)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(
        mcOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f, blockSize, dynamicSMemSize));
}

mcError_t wcudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *f,
                                                                  int blockSize,
                                                                  size_t dynamicSMemSize,
                                                                  unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, f, blockSize,
                                                                      dynamicSMemSize, flags));
}

//---------------------------------------------------------------------------//
// Memory Management
//---------------------------------------------------------------------------//
mcError_t wcudaMalloc(void **ptr, size_t sizeBytes)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMalloc_v0(ptr, sizeBytes));
}

mcError_t wcudaMallocHost(void **ptr, size_t size)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMallocHost_v0(ptr, size));
}

mcError_t wcudaMallocHost(void **ptr, size_t size, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMallocHost(ptr, size, flags));
}

mcError_t wcudaMallocArray(mcArray_t *array, const mcChannelFormatDesc *desc, size_t width,
                           size_t height, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMallocArray_v0(array, desc, width, height, flags));
}

mcError_t wcudaMallocMipmappedArray(mcMipmappedArray_t *mipmappedArray,
                                    const mcChannelFormatDesc *desc, struct mcExtent extent,
                                    unsigned int numLevels, unsigned int flags)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaMemcpy(void *dst, const void *src, size_t count, mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpy(dst, src, count, kind));
}

mcError_t wcudaMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width,
                             size_t height, enum _mcMemcpyKind kind, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream));
}

mcError_t wcudaMemcpyFromArray(void *dst, mcArray_const_t src, size_t wOffset, size_t hOffset,
                               size_t count, mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotSupported);
}
mcError_t wcudaMemcpyArrayToArray(mcArray_t dst, size_t wOffsetDst, size_t hOffsetDst,
                                  mcArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc,
                                  size_t count, mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();

    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotSupported);
}

mcError_t wcudaMemcpyToArrayAsync(mcArray_t dst, size_t wOffset, size_t hOffset, const void *src,
                                  size_t count, mcMemcpyKind kind, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotSupported);
}
mcError_t wcudaMemcpyFromArrayAsync(void *dst, mcArray_const_t src, size_t wOffset, size_t hOffset,
                                    size_t count, mcMemcpyKind kind, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotSupported);
}

mcError_t wcudaMemcpy2DToArrayAsync(mcArray_t dst, size_t wOffset, size_t hOffset, const void *src,
                                    size_t spitch, size_t width, size_t height, mcMemcpyKind kind,
                                    mcStream_t stream)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaMemcpy2DFromArrayAsync(void *dst, size_t dpitch, mcArray_const_t src, size_t wOffset,
                                      size_t hOffset, size_t width, size_t height,
                                      mcMemcpyKind kind, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaMemcpyAsync(void *dst, const void *src, size_t count, enum _mcMemcpyKind kind,
                           mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpyAsync(dst, src, count, kind, stream));
}

mcError_t wcudaFree(void *ptr)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcFree(ptr));
}

mcError_t wcudaFreeHost(void *ptr)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcFreeHost(ptr));
}

mcError_t wcudaMemcpyToArray(mcArray_t dst, size_t wOffset, size_t hOffset, const void *src,
                             size_t count, enum _mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpyToArray(dst, wOffset, hOffset, src, count, kind));
}

mcError_t wcudaMemcpy2DToArray(mcArray_t dst, size_t wOffset, size_t hOffset, const void *src,
                               size_t spitch, size_t width, size_t height, enum _mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, height, kind));
}

mcError_t wcudaMemcpy2DFromArray(void *dst, size_t dpitch, mcArray_const_t src, size_t wOffset,
                                 size_t hOffset, size_t width, size_t height, mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t cudaMemcpy2DArrayToArray(mcArray_t dst, size_t wOffsetDst, size_t hOffsetDst,
                                   mcArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc,
                                   size_t width, size_t height, mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaFreeArray(mcArray_t array)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcFreeArray(array));
}

mcError_t wcudaFreeMipmappedArray(mcMipmappedArray_t mipmappedArray)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaHostGetDevicePointer(void **devPtr, void *hostPtr, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcHostGetDevicePointer(devPtr, hostPtr, flags));
}

mcError_t wcudaHostAlloc(void **pHost, size_t size, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcHostAlloc_v0(pHost, size, flags));
}

mcError_t wcudaHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcHostRegister(hostPtr, sizeBytes, flags));
}

mcError_t wcudaHostUnregister(void *hostPtr)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcHostUnregister(hostPtr));
}

mcError_t wcudaMemGetInfo(size_t *free, size_t *total)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemGetInfo(free, total));
}

mcError_t wcudaArrayGetInfo(mcChannelFormatDesc *desc, struct mcExtent *extent, unsigned int *flags,
                            mcArray_t array)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaArrayGetPlane(mcArray_t *pPlaneArray, mcArray_t hArray, unsigned int planeIdx)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaArrayGetMemoryRequirements(mcArrayMemoryRequirements *memoryRequirements,
                                          mcArray_t array, int device)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaMipmappedArrayGetMemoryRequirements(mcArrayMemoryRequirements *memoryRequirements,
                                                   mcMipmappedArray_t mipmap, int device)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaArrayGetSparseProperties(mcArraySparseProperties *sparseProperties, mcArray_t array)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaMipmappedArrayGetSparseProperties(mcArraySparseProperties *sparseProperties,
                                                 mcMipmappedArray_t mipmap)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaMemset(void *dst, int value, size_t sizeBytes)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemset(dst, value, sizeBytes));
}

mcError_t wcudaMemsetAsync(void *dst, int value, size_t sizeBytes, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemsetAsync(dst, value, sizeBytes, stream));
}

mcError_t wcudaGetSymbolAddress(void **devPtr, const void *symbol)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGetSymbolAddress(devPtr, symbol));
}

mcError_t wcudaGetSymbolSize(size_t *size, const void *symbol)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGetSymbolSize(size, symbol));
}

mcError_t wcudaHostGetFlags(unsigned int *flagsPtr, void *hostPtr)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcHostGetFlags(flagsPtr, hostPtr));
}

mcError_t wcudaMallocPitch(void **ptr, size_t *pitch, size_t width, size_t height)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMallocPitch(ptr, pitch, width, height));
}

mcError_t wcudaMalloc3D(struct mcPitchedPtr *pitchedDevPtr, struct mcExtent extent)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMalloc3D(pitchedDevPtr, extent));
}

mcError_t wcudaMalloc3DArray(mcArray_t *array, const mcChannelFormatDesc *desc,
                             struct mcExtent extent, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMalloc3DArray(array, desc, extent, flags));
}

mcError_t wcudaMemcpy3D(const struct mcMemcpy3DParms *p)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpy3D(p));
}

mcError_t wcudaMemcpy3DAsync(const struct mcMemcpy3DParms *p, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpy3DAsync(p, stream));
}

mcError_t wcudaMemcpy3DPeer(const mcMemcpy3DPeerParms *p)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaMemcpy3DPeerAsync(const mcMemcpy3DPeerParms *p, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaMemset3D(struct mcPitchedPtr pitchedDevPtr, int value, struct mcExtent extent)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemset3D(pitchedDevPtr, value, extent));
}

mcError_t wcudaMemset3DAsync(struct mcPitchedPtr pitchedDevPtr, int value, struct mcExtent extent,
                             mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemset3DAsync(pitchedDevPtr, value, extent, stream));
}

mcError_t wcudaMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width,
                        size_t height, enum _mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpy2D(dst, dpitch, src, spitch, width, height, kind));
}

mcError_t wcudaMemcpyFromSymbol(void *dst, const void *symbol, size_t sizeBytes, size_t offset,
                                enum _mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpyFromSymbol(dst, symbol, sizeBytes, offset, kind));
}

mcError_t wcudaMemcpyFromSymbolAsync(void *dst, const void *symbol, size_t sizeBytes, size_t offset,
                                     enum _mcMemcpyKind kind, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpyFromSymbolAsync(dst, symbol, sizeBytes, offset, kind, stream));
}

mcError_t wcudaMemcpyPeer(void *dst, int dstDevice, const void *src, int srcDevice,
                          size_t sizeBytes)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpyPeer(dst, dstDevice, src, srcDevice, sizeBytes));
}

mcError_t wcudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice,
                               size_t sizeBytes, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpyPeerAsync(dst, dstDevice, src, srcDevice, sizeBytes, stream));
}

mcError_t wcudaMemcpyToSymbol(const void *symbol, const void *src, size_t sizeBytes, size_t offset,
                              enum _mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpyToSymbol(symbol, src, sizeBytes, offset, kind));
}

mcError_t wcudaMemcpyToSymbolAsync(const void *symbol, const void *src, size_t sizeBytes,
                                   size_t offset, enum _mcMemcpyKind kind, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemcpyToSymbolAsync(symbol, src, sizeBytes, offset, kind, stream));
}

mcError_t wcudaMemset2D(void *dst, size_t pitch, int value, size_t width, size_t height)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemset2D(dst, pitch, value, width, height));
}

mcError_t wcudaMemset2DAsync(void *dst, size_t pitch, int value, size_t width, size_t height,
                             mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemset2DAsync(dst, pitch, value, width, height, stream));
}

mcExtent make_wcudaExtent(size_t w, size_t h, size_t d) { return make_mcExtent(w, h, d); }

struct mcPitchedPtr make_wcudaPitchedPtr(void *d, size_t p, size_t xsz, size_t ysz)
{
    return make_mcPitchedPtr(d, p, xsz, ysz);
}

mcPos make_wcudaPos(size_t x, size_t y, size_t z) { return make_mcPos(x, y, z); }

mcError_t wcudaIpcGetMemHandle(mcIpcMemHandle_t *handle, void *devPtr)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcIpcGetMemHandle(handle, devPtr));
}

mcError_t wcudaIpcOpenMemHandle(void **devPtr, mcIpcMemHandle_t handle, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcIpcOpenMemHandle(devPtr, handle, flags));
}

mcError_t wcudaIpcCloseMemHandle(void *devPtr)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcIpcCloseMemHandle(devPtr));
}

//---------------------------------------------------------------------------//
// Stream Ordered Memory Allocator
//---------------------------------------------------------------------------//
mcError_t wcudaMemPoolCreate(mcMemPool_t *memPool, const struct mcMemPoolProps *poolProps)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemPoolCreate(memPool, poolProps));
}

mcError_t wcudaMemPoolDestroy(mcMemPool_t memPool)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemPoolDestroy(memPool));
}

mcError_t wcudaMemPoolExportPointer(struct mcMemPoolPtrExportData *exportData, void *ptr)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemPoolExportPointer(exportData, ptr));
}

mcError_t wcudaMemPoolExportToShareableHandle(void *sharedHandle, mcMemPool_t memPool,
                                              enum mcMemAllocationHandleType handleType,
                                              unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemPoolExportToShareableHandle(sharedHandle, memPool, handleType, flags));
}

mcError_t wcudaMemPoolGetAccess(enum mcMemAccessFlags *flags, mcMemPool_t memPool,
                                struct mcMemLocation *location)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemPoolGetAccess(flags, memPool, location));
}

mcError_t wcudaMemPoolGetAttribute(mcMemPool_t memPool, enum mcMemPoolAttr attr, void *value)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemPoolGetAttribute(memPool, attr, value));
}

mcError_t wcudaMemPoolImportFromShareableHandle(mcMemPool_t *memPool, void *sharedHandle,
                                                enum mcMemAllocationHandleType handleType,
                                                unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemPoolImportFromShareableHandle(memPool, sharedHandle, handleType, flags));
}

mcError_t wcudaMemPoolImportPointer(void **ptr, mcMemPool_t memPool,
                                    struct mcMemPoolPtrExportData *exportData)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemPoolImportPointer(ptr, memPool, exportData));
}

mcError_t wcudaMemPoolSetAccess(mcMemPool_t memPool, const struct mcMemAccessDesc *descList,
                                size_t count)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemPoolSetAccess(memPool, descList, count));
}

mcError_t wcudaMemPoolSetAttribute(mcMemPool_t memPool, enum mcMemPoolAttr attr, void *value)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemPoolSetAttribute(memPool, attr, value));
}

mcError_t wcudaMemPoolTrimTo(mcMemPool_t memPool, size_t minBytesToKeep)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemPoolTrimTo(memPool, minBytesToKeep));
}

//---------------------------------------------------------------------------//
// Unified Addressing
//---------------------------------------------------------------------------//
mcError_t wcudaMallocManaged(void **dev_ptr, size_t size, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMallocManaged(dev_ptr, size, flags));
}

mcError_t wcudaMemAdvise(const void *dev_ptr, size_t count, mcMemoryAdvise_t advice, int device)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemAdvise(dev_ptr, count, advice, device));
}

mcError_t wcudaMemPrefetchAsync(const void *dev_ptr, size_t count, int device, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemPrefetchAsync(dev_ptr, count, device, stream));
}

mcError_t wcudaMemRangeGetAttribute(void *data, size_t data_size, mcMemRangeAttribute_t attribute,
                                    const void *dev_ptr, size_t count)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMemRangeGetAttribute(data, data_size, attribute, dev_ptr, count));
}

mcError_t wcudaMemRangeGetAttributes(void **data, size_t *data_sizes,
                                     mcMemRangeAttribute_t *attributes, size_t num_attributes,
                                     const void *dev_ptr, size_t count)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(
        mcMemRangeGetAttributes(data, data_sizes, attributes, num_attributes, dev_ptr, count));
}

mcError_t wcudaPointerGetAttributes(struct _mcPointerAttribute_t *attributes, const void *ptr)
{
    ACTIVE_CONTEXT();
    mcError_t ret = mcPointerGetAttributes(attributes, ptr);
    if (ret == mcSuccess) {
        if (attributes->memoryType == mcMemoryTypeUnified) {
            attributes->memoryType = mcMemoryTypeManaged;
        }
        if (attributes->type == mcMemoryTypeUnified) {
            attributes->type = mcMemoryTypeManaged;
        }
    }
    WCUDA_RETURN(ret);
}

//---------------------------------------------------------------------------//
// Peer Device Memory Access
//---------------------------------------------------------------------------//
mcError_t wcudaDeviceCanAccessPeer(int *canAccessPeer, int deviceId, int peerDeviceId)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceCanAccessPeer(canAccessPeer, deviceId, peerDeviceId));
}

mcError_t wcudaDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceEnablePeerAccess(peerDeviceId, flags));
}

mcError_t wcudaDeviceDisablePeerAccess(int peerDeviceId)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceDisablePeerAccess(peerDeviceId));
}

//---------------------------------------------------------------------------//
// Version Version Management
//---------------------------------------------------------------------------//
mcError_t wcudaDriverGetVersion(int *driverVersion)
{
    if (driverVersion == nullptr) {
        WCUDA_RETURN(mcErrorInvalidValue);
    }
    if (__wcuda_version_internal__ < 1000) {
        WCUDA_RETURN(mcErrorNotSupported);
    } else {
        *driverVersion = __wcuda_version_internal__;
    }
    WCUDA_RETURN(mcSuccess);
}

mcError_t wcudaRuntimeGetVersion(int *runtimeVersion)
{
    if (runtimeVersion == nullptr) {
        WCUDA_RETURN(mcErrorInvalidValue);
    }
    if (__wcuda_version_internal__ < 1000) {
        WCUDA_RETURN(mcErrorNotSupported);
    } else {
        *runtimeVersion = __wcuda_version_internal__;
    }
    WCUDA_RETURN(mcSuccess);
}

/**
 * @} Version
 */

//---------------------------------------------------------------------------//
// Interactions with the MACA Driver API
//---------------------------------------------------------------------------//
mcError_t wcudaGetFuncBySymbol(mcFunction_t *functionPtr, const void *symbolPtr)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGetFuncBySymbol(functionPtr, symbolPtr));
}

//---------------------------------------------------------------------------//
//  Driver Entry Point Access
//---------------------------------------------------------------------------//
extern std::map<std::string, std::map<unsigned int, void *>> cuApiWrapperMap;
mcError_t wcudaGetDriverEntryPoint(const char *symbol, void **funcPtr, unsigned long long flags)
{
    WCUDA_RETURN(mcGetDriverEntryPoint_v0(cuApiWrapperMap, symbol, funcPtr, flags));
}

//---------------------------------------------------------------------------//
// Profiler Control
//---------------------------------------------------------------------------//

mcError_t wcudaProfilerInitialize(const char *configFile, const char *outputFile,
                                  mcOutputMode_t outputMode)
{
    WCUDA_RETURN(mcProfilerInitialize(configFile, outputFile, outputMode));
}

mcError_t wcudaProfilerStart(void) { WCUDA_RETURN(mcProfilerStart()); }

mcError_t wcudaProfilerStop(void) { WCUDA_RETURN(mcProfilerStop()); }

//---------------------------------------------------------------------------//
// Stream Ordered Memory Allocator
//---------------------------------------------------------------------------//
mcError_t wcudaMallocAsync(void **devPtr, size_t size, mcStream_t hStream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMallocAsync(devPtr, size, hStream));
}

mcError_t wcudaMallocAsync(void **ptr, size_t size, mcMemPool_t memPool, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMallocFromPoolAsync(ptr, size, memPool, stream));
}

mcError_t wcudaMallocFromPoolAsync(void **ptr, size_t size, mcMemPool_t memPool, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcMallocFromPoolAsync(ptr, size, memPool, stream));
}

mcError_t wcudaFreeAsync(void *devPtr, mcStream_t hStream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcFreeAsync(devPtr, hStream));
}

//---------------------------------------------------------------------------//
// External Resource Interoperability
//---------------------------------------------------------------------------//
mcError_t wcudaImportExternalSemaphore(mcExternalSemaphore_t *extSem_out,
                                       const mcExternalSemaphoreHandleDesc *semHandleDesc)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaDestroyExternalSemaphore(mcExternalSemaphore_t extSem)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaSignalExternalSemaphoresAsync(const mcExternalSemaphore_t *extSemArray,
                                             const mcExternalSemaphoreSignalParams *paramsArray,
                                             unsigned int numExtSems, mcStream_t stream)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaWaitExternalSemaphoresAsync(const mcExternalSemaphore_t *extSemArray,
                                           const mcExternalSemaphoreWaitParams *paramsArray,
                                           unsigned int numExtSems, mcStream_t stream)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaImportExternalMemory(mcExternalMemory_t *extMem_out,
                                    const mcExternalMemoryHandleDesc *memHandleDesc)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaDestroyExternalMemory(mcExternalMemory_t extMem)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaExternalMemoryGetMappedBuffer(void **devPtr, mcExternalMemory_t extMem,
                                             const mcExternalMemoryBufferDesc *bufferDesc)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t
wcudaExternalMemoryGetMappedMipmappedArray(mcMipmappedArray_t *mipmap, mcExternalMemory_t extMem,
                                           const mcExternalMemoryMipmappedArrayDesc *mipmapDesc)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

//---------------------------------------------------------------------------//
// graph
//---------------------------------------------------------------------------//

mcError_t wcudaDeviceGetGraphMemAttribute(int device, mcGraphMemAttributeType attr, void *value)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceGetGraphMemAttribute(device, attr, value));
}

mcError_t wcudaDeviceGraphMemTrim(int device)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceGraphMemTrim(device));
}

mcError_t wcudaDeviceSetGraphMemAttribute(int device, mcGraphMemAttributeType attr, void *value)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcDeviceSetGraphMemAttribute(device, attr, value));
}

mcError_t wcudaGraphAddChildGraphNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                      const mcGraphNode_t *pDependencies, size_t numDependencies,
                                      mcGraph_t childGraph)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(
        mcGraphAddChildGraphNode(pGraphNode, graph, pDependencies, numDependencies, childGraph));
}

mcError_t wcudaGraphAddDependencies(mcGraph_t graph, const mcGraphNode_t *from,
                                    const mcGraphNode_t *to, size_t numDependencies)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphAddDependencies(graph, from, to, numDependencies));
}

mcError_t wcudaGraphAddEmptyNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                 const mcGraphNode_t *pDependencies, size_t numDependencies)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphAddEmptyNode(pGraphNode, graph, pDependencies, numDependencies));
}

mcError_t wcudaGraphAddEventRecordNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                       const mcGraphNode_t *pDependencies, size_t numDependencies,
                                       mcEvent_t event)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(
        mcGraphAddEventRecordNode(pGraphNode, graph, pDependencies, numDependencies, event));
}

mcError_t wcudaGraphAddEventWaitNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                     const mcGraphNode_t *pDependencies, size_t numDependencies,
                                     mcEvent_t event)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphAddEventWaitNode(pGraphNode, graph, pDependencies, numDependencies, event));
}

mcError_t wcudaGraphAddExternalSemaphoresSignalNode(
    mcGraphNode_t *pGraphNode, mcGraph_t graph, const mcGraphNode_t *pDependencies,
    size_t numDependencies, const mcExternalSemaphoreSignalNodeParams *nodeParams)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t
wcudaGraphAddExternalSemaphoresWaitNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                        const mcGraphNode_t *pDependencies, size_t numDependencies,
                                        const mcExternalSemaphoreWaitNodeParams *nodeParams)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaGraphAddHostNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                const mcGraphNode_t *pDependencies, size_t numDependencies,
                                const mcHostNodeParams *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(
        mcGraphAddHostNode(pGraphNode, graph, pDependencies, numDependencies, pNodeParams));
}

mcError_t wcudaGraphAddKernelNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                  const mcGraphNode_t *pDependencies, size_t numDependencies,
                                  const mcKernelNodeParams *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(
        mcGraphAddKernelNode(pGraphNode, graph, pDependencies, numDependencies, pNodeParams));
}

mcError_t wcudaGraphAddMemAllocNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                    const mcGraphNode_t *pDependencies, size_t numDependencies,
                                    struct mcMemAllocNodeParams *nodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(
        mcGraphAddMemAllocNode(pGraphNode, graph, pDependencies, numDependencies, nodeParams));
}

mcError_t wcudaGraphAddMemFreeNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                   const mcGraphNode_t *pDependencies, size_t numDependencies,
                                   void *dptr)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphAddMemFreeNode(pGraphNode, graph, pDependencies, numDependencies, dptr));
}

mcError_t wcudaGraphAddMemcpyNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                  const mcGraphNode_t *pDependencies, size_t numDependencies,
                                  const struct mcMemcpy3DParms *pCopyParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(
        mcGraphAddMemcpyNode(pGraphNode, graph, pDependencies, numDependencies, pCopyParams));
}

mcError_t wcudaGraphAddMemcpyNode1D(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                    const mcGraphNode_t *pDependencies, size_t numDependencies,
                                    void *dst, const void *src, size_t count, mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphAddMemcpyNode1D(pGraphNode, graph, pDependencies, numDependencies, dst, src,
                                        count, kind));
}

mcError_t wcudaGraphAddMemcpyNodeFromSymbol(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                            const mcGraphNode_t *pDependencies,
                                            size_t numDependencies, void *dst, const void *symbol,
                                            size_t count, size_t offset, mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    return mcGraphAddMemcpyNodeFromSymbol(pGraphNode, graph, pDependencies, numDependencies, dst,
                                          symbol, count, offset, kind);
}

mcError_t wcudaGraphAddMemcpyNodeToSymbol(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                          const mcGraphNode_t *pDependencies,
                                          size_t numDependencies, const void *symbol,
                                          const void *src, size_t count, size_t offset,
                                          mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    return mcGraphAddMemcpyNodeToSymbol(pGraphNode, graph, pDependencies, numDependencies, symbol,
                                        src, count, offset, kind);
}

mcError_t wcudaGraphAddMemsetNode(mcGraphNode_t *pGraphNode, mcGraph_t graph,
                                  const mcGraphNode_t *pDependencies, size_t numDependencies,
                                  const mcMemsetParams *pMemsetParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(
        mcGraphAddMemsetNode(pGraphNode, graph, pDependencies, numDependencies, pMemsetParams));
}

mcError_t wcudaGraphChildGraphNodeGetGraph(mcGraphNode_t node, mcGraph_t *pGraph)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphChildGraphNodeGetGraph(node, pGraph));
}

mcError_t wcudaGraphClone(mcGraph_t *pGraphClone, mcGraph_t originalGraph)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphClone(pGraphClone, originalGraph));
}

mcError_t wcudaGraphCreate(mcGraph_t *pGraph, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphCreate(pGraph, flags));
}

mcError_t wcudaGraphDebugDotPrint(mcGraph_t graph, const char *path, unsigned int flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphDebugDotPrint(graph, path, flags));
}

mcError_t wcudaGraphDestroy(mcGraph_t graph)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphDestroy(graph));
}

mcError_t wcudaGraphDestroyNode(mcGraphNode_t node)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphDestroyNode(node));
}

mcError_t wcudaGraphEventRecordNodeGetEvent(mcGraphNode_t node, mcEvent_t *event_out)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphEventRecordNodeGetEvent(node, event_out));
}

mcError_t wcudaGraphEventRecordNodeSetEvent(mcGraphNode_t node, mcEvent_t event)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphEventRecordNodeSetEvent(node, event));
}

mcError_t wcudaGraphEventWaitNodeGetEvent(mcGraphNode_t node, mcEvent_t *event_out)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphEventWaitNodeGetEvent(node, event_out));
}

mcError_t wcudaGraphEventWaitNodeSetEvent(mcGraphNode_t node, mcEvent_t event)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphEventWaitNodeSetEvent(node, event));
}

mcError_t wcudaGraphExecChildGraphNodeSetParams(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                                mcGraph_t childGraph)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphExecChildGraphNodeSetParams(hGraphExec, node, childGraph));
}

mcError_t wcudaGraphExecDestroy(mcGraphExec_t graphExec)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphExecDestroy(graphExec));
}

mcError_t wcudaGraphExecEventRecordNodeSetEvent(mcGraphExec_t hGraphExec, mcGraphNode_t hNode,
                                                mcEvent_t event)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event));
}

mcError_t wcudaGraphExecEventWaitNodeSetEvent(mcGraphExec_t hGraphExec, mcGraphNode_t hNode,
                                              mcEvent_t event)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event));
}

mcError_t wcudaGraphExecExternalSemaphoresSignalNodeSetParams(
    mcGraphExec_t hGraphExec, mcGraphNode_t hNode,
    const mcExternalSemaphoreSignalNodeParams *nodeParams)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaGraphExecExternalSemaphoresWaitNodeSetParams(
    mcGraphExec_t hGraphExec, mcGraphNode_t hNode,
    const mcExternalSemaphoreWaitNodeParams *nodeParams)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaGraphExecHostNodeSetParams(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                          const mcHostNodeParams *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphExecHostNodeSetParams(hGraphExec, node, pNodeParams));
}

mcError_t wcudaGraphExecKernelNodeSetParams(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                            const mcKernelNodeParams *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphExecKernelNodeSetParams(hGraphExec, node, pNodeParams));
}

mcError_t wcudaGraphExecMemcpyNodeSetParams(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                            const struct mcMemcpy3DParms *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphExecMemcpyNodeSetParams(hGraphExec, node, pNodeParams));
}

mcError_t wcudaGraphExecMemcpyNodeSetParams1D(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                              void *dst, const void *src, size_t count,
                                              mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphExecMemcpyNodeSetParams1D(hGraphExec, node, dst, src, count, kind));
}

mcError_t wcudaGraphExecMemcpyNodeSetParamsFromSymbol(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                                      void *dst, const void *symbol, size_t count,
                                                      size_t offset, mcMemcpyKind kind)
{
    WCUDA_RETURN(mcGraphExecMemcpyNodeSetParamsFromSymbol(hGraphExec, node, dst, symbol, count,
                                                          offset, kind));
}

mcError_t wcudaGraphExecMemcpyNodeSetParamsToSymbol(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                                    const void *symbol, const void *src,
                                                    size_t count, size_t offset, mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(
        mcGraphExecMemcpyNodeSetParamsToSymbol(hGraphExec, node, symbol, src, count, offset, kind));
}

mcError_t wcudaGraphExecMemsetNodeSetParams(mcGraphExec_t hGraphExec, mcGraphNode_t node,
                                            const mcMemsetParams *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphExecMemsetNodeSetParams(hGraphExec, node, pNodeParams));
}

mcError_t wcudaGraphExecUpdate(mcGraphExec_t hGraphExec, mcGraph_t hGraph,
                               mcGraphNode_t *hErrorNode_out,
                               mcGraphExecUpdateResult *updateResult_out)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphExecUpdate(hGraphExec, hGraph, hErrorNode_out, updateResult_out));
}

mcError_t
wcudaGraphExternalSemaphoresSignalNodeGetParams(mcGraphNode_t hNode,
                                                mcExternalSemaphoreSignalNodeParams *params_out)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaGraphExternalSemaphoresSignalNodeSetParams(
    mcGraphNode_t hNode, const mcExternalSemaphoreSignalNodeParams *nodeParams)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t
wcudaGraphExternalSemaphoresWaitNodeGetParams(mcGraphNode_t hNode,
                                              mcExternalSemaphoreWaitNodeParams *params_out)
{
    ACTIVE_CONTEXT();
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t
wcudaGraphExternalSemaphoresWaitNodeSetParams(mcGraphNode_t hNode,
                                              const mcExternalSemaphoreWaitNodeParams *nodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcErrorNotSupported);
}

mcError_t wcudaGraphGetEdges(mcGraph_t graph, mcGraphNode_t *from, mcGraphNode_t *to,
                             size_t *numEdges)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphGetEdges(graph, from, to, numEdges));
}

mcError_t wcudaGraphGetNodes(mcGraph_t graph, mcGraphNode_t *nodes, size_t *numNodes)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphGetNodes(graph, nodes, numNodes));
}

mcError_t wcudaGraphGetRootNodes(mcGraph_t graph, mcGraphNode_t *pRootNodes, size_t *pNumRootNodes)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphGetRootNodes(graph, pRootNodes, pNumRootNodes));
}

mcError_t wcudaGraphHostNodeGetParams(mcGraphNode_t node, mcHostNodeParams *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphHostNodeGetParams(node, pNodeParams));
}

mcError_t wcudaGraphHostNodeSetParams(mcGraphNode_t node, const mcHostNodeParams *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphHostNodeSetParams(node, pNodeParams));
}

mcError_t wcudaGraphInstantiate(mcGraphExec_t *pGraphExec, mcGraph_t graph,
                                mcGraphNode_t *pErrorNode, char *pLogBuffer, size_t bufferSize)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphInstantiate(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize));
}

mcError_t wcudaGraphInstantiateWithFlags(mcGraphExec_t *pGraphExec, mcGraph_t graph,
                                         unsigned long long flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphInstantiateWithFlags(pGraphExec, graph, flags));
}

mcError_t wcudaGraphInstantiateWithParams(mcGraphExec_t *pGraphExec, mcGraph_t graph,
                                          mcGraphInstantiateParams *instantiateParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphInstantiateWithParams(pGraphExec, graph, instantiateParams));
}

mcError_t wcudaGraphExecGetFlags(mcGraphExec_t graphExec, unsigned long long *flags)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphExecGetFlags(graphExec, flags));
}

mcError_t wcudaGraphKernelNodeCopyAttributes(mcGraphNode_t hSrc, mcGraphNode_t hDst)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphKernelNodeCopyAttributes(hSrc, hDst));
}

mcError_t wcudaGraphKernelNodeGetAttribute(mcGraphNode_t hNode, mcKernelNodeAttrID attr,
                                           mcKernelNodeAttrValue *value_out)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphKernelNodeGetAttribute(hNode, attr, value_out));
}

mcError_t wcudaGraphKernelNodeGetParams(mcGraphNode_t node, mcKernelNodeParams *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphKernelNodeGetParams(node, pNodeParams));
}

mcError_t wcudaGraphKernelNodeSetAttribute(mcGraphNode_t hNode, mcKernelNodeAttrID attr,
                                           const mcKernelNodeAttrValue *value)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphKernelNodeSetAttribute(hNode, attr, value));
}

mcError_t wcudaGraphKernelNodeSetParams(mcGraphNode_t node, const mcKernelNodeParams *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphKernelNodeSetParams(node, pNodeParams));
}

mcError_t wcudaGraphLaunch(mcGraphExec_t graphExec, mcStream_t stream)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphLaunch(graphExec, stream));
}

mcError_t wcudaGraphMemAllocNodeGetParams(mcGraphNode_t node,
                                          struct mcMemAllocNodeParams *params_out)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphMemAllocNodeGetParams(node, params_out));
}

mcError_t wcudaGraphMemFreeNodeGetParams(mcGraphNode_t node, void *dptr_out)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphMemFreeNodeGetParams(node, dptr_out));
}

mcError_t wcudaGraphMemcpyNodeGetParams(mcGraphNode_t node, struct mcMemcpy3DParms *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphMemcpyNodeGetParams(node, pNodeParams));
}

mcError_t wcudaGraphMemcpyNodeSetParams(mcGraphNode_t node,
                                        const struct mcMemcpy3DParms *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphMemcpyNodeSetParams(node, pNodeParams));
}

mcError_t wcudaGraphMemcpyNodeSetParams1D(mcGraphNode_t node, void *dst, const void *src,
                                          size_t count, mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphMemcpyNodeSetParams1D(node, dst, src, count, kind));
}

mcError_t wcudaGraphMemcpyNodeSetParamsFromSymbol(mcGraphNode_t node, void *dst, const void *symbol,
                                                  size_t count, size_t offset, mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphMemcpyNodeSetParamsFromSymbol(node, dst, symbol, count, offset, kind));
}

mcError_t wcudaGraphMemcpyNodeSetParamsToSymbol(mcGraphNode_t node, const void *symbol,
                                                const void *src, size_t count, size_t offset,
                                                mcMemcpyKind kind)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphMemcpyNodeSetParamsToSymbol(node, symbol, src, count, offset, kind));
}

mcError_t wcudaGraphMemsetNodeGetParams(mcGraphNode_t node, mcMemsetParams *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphMemsetNodeGetParams(node, pNodeParams));
}

mcError_t wcudaGraphMemsetNodeSetParams(mcGraphNode_t node, const mcMemsetParams *pNodeParams)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphMemsetNodeSetParams(node, pNodeParams));
}

mcError_t wcudaGraphNodeFindInClone(mcGraphNode_t *pNode, mcGraphNode_t originalNode,
                                    mcGraph_t clonedGraph)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphNodeFindInClone(pNode, originalNode, clonedGraph));
}

mcError_t wcudaGraphNodeGetDependencies(mcGraphNode_t node, mcGraphNode_t *pDependencies,
                                        size_t *pNumDependencies)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphNodeGetDependencies(node, pDependencies, pNumDependencies));
}

mcError_t wcudaGraphNodeGetDependentNodes(mcGraphNode_t node, mcGraphNode_t *pDependentNodes,
                                          size_t *pNumDependentNodes)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphNodeGetDependentNodes(node, pDependentNodes, pNumDependentNodes));
}

mcError_t wcudaGraphNodeGetEnabled(mcGraphExec_t hGraphExec, mcGraphNode_t hNode,
                                   unsigned int *isEnabled)
{
    ACTIVE_CONTEXT();
    WCUDA_RETURN(mcGraphNodeGetEnabled(hGraphExec, hNode, isEnabled));
}

mcError_t wcudaGraphNodeGetType(mcGraphNode_t node, mcGraphNodeType *pType)
{
    WCUDA_RETURN(mcGraphNodeGetType(node, pType));
}

mcError_t wcudaGraphNodeSetEnabled(mcGraphExec_t hGraphExec, mcGraphNode_t hNode,
                                   unsigned int isEnabled)
{
    WCUDA_RETURN(mcGraphNodeSetEnabled(hGraphExec, hNode, isEnabled));
}

mcError_t wcudaGraphReleaseUserObject(mcGraph_t graph, mcUserObject_t object, unsigned int count)
{
    WCUDA_RETURN(mcGraphReleaseUserObject(graph, object, count));
}

mcError_t wcudaGraphRemoveDependencies(mcGraph_t graph, const mcGraphNode_t *from,
                                       const mcGraphNode_t *to, size_t numDependencies)
{
    WCUDA_RETURN(mcGraphRemoveDependencies(graph, from, to, numDependencies));
}

mcError_t wcudaGraphRetainUserObject(mcGraph_t graph, mcUserObject_t object, unsigned int count,
                                     unsigned int flags)
{
    WCUDA_RETURN(mcGraphRetainUserObject(graph, object, count, flags));
}

mcError_t wcudaGraphUpload(mcGraphExec_t graphExec, mcStream_t stream)
{
    WCUDA_RETURN(mcGraphUpload(graphExec, stream));
}

mcError_t wcudaUserObjectCreate(mcUserObject_t *object_out, void *ptr, mcHostFn_t destroy,
                                unsigned int initialRefcount, unsigned int flags)
{
    WCUDA_RETURN(mcUserObjectCreate(object_out, ptr, destroy, initialRefcount, flags));
}

mcError_t wcudaUserObjectRelease(mcUserObject_t object, unsigned int count)
{
    WCUDA_RETURN(mcUserObjectRelease(object, count));
}

mcError_t wcudaUserObjectRetain(mcUserObject_t object, unsigned int count)
{
    WCUDA_RETURN(mcUserObjectRetain(object, count));
}

//---------------------------------------------------------------------------//
// Graphics Interoperability
//---------------------------------------------------------------------------//

mcError_t wcudaGraphicsMapResources(int count, mcGraphicsResource_t *resources, mcStream_t stream)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaGraphicsResourceGetMappedMipmappedArray(mcMipmappedArray_t *mipmappedArray,
                                                       mcGraphicsResource_t resource)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaGraphicsResourceGetMappedPointer(void **devPtr, size_t *size,
                                                mcGraphicsResource_t resource)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaGraphicsResourceSetMapFlags(mcGraphicsResource_t resource, unsigned int flags)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaGraphicsSubResourceGetMappedArray(mcArray_t *array, mcGraphicsResource_t resource,
                                                 unsigned int arrayIndex, unsigned int mipLevel)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaGraphicsUnmapResources(int count, mcGraphicsResource_t *resources, mcStream_t stream)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaGraphicsUnregisterResource(mcGraphicsResource_t resource)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

//---------------------------------------------------------------------------//
//  Texture Object Management
//---------------------------------------------------------------------------//
mcError_t wcudaGetChannelDesc(mcChannelFormatDesc *desc, mcArray_const_t array)
{
    WCUDA_RETURN(mcGetChannelDesc(desc, array));
}

mcChannelFormatDesc wcudaCreateChannelDesc(int x, int y, int z, int w,
                                           enum mcChannelFormatKind_enum f)
{
    // FIX ME: add args check according to api case result.
    return mcCreateChannelDesc(x, y, z, w, f);
}

mcError_t wcudaCreateTextureObject(mcTextureObject_t *pTexObject,
                                   const struct mcResourceDesc *pResDesc,
                                   const struct mcTextureDesc *pTexDesc,
                                   const struct mcResourceViewDesc *pResViewDesc)
{
    WCUDA_RETURN(mcCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
}

mcError_t wcudaDestroyTextureObject(mcTextureObject_t texObject)
{
    WCUDA_RETURN(mcDestroyTextureObject(texObject));
}

mcError_t wcudaGetTextureObjectResourceDesc(struct mcResourceDesc *pResDesc,
                                            mcTextureObject_t texObject)
{
    WCUDA_RETURN(mcGetTextureObjectResourceDesc(pResDesc, texObject));
}

mcError_t wcudaGetTextureObjectTextureDesc(struct mcTextureDesc *pTexDesc,
                                           mcTextureObject_t texObject)
{
    WCUDA_RETURN(mcGetTextureObjectTextureDesc(pTexDesc, texObject));
}

mcError_t wcudaGetTextureObjectResourceViewDesc(struct mcResourceViewDesc *pResViewDesc,
                                                mcTextureObject_t texObject)
{
    WCUDA_RETURN(mcGetTextureObjectResourceViewDesc(pResViewDesc, texObject));
}

//---------------------------------------------------------------------------//
//  Texture Reference Management [DEPRECATED]
//---------------------------------------------------------------------------//
mcError_t wcudaBindTexture(size_t *offset, const struct textureReference *texref,
                           const void *devPtr, const mcChannelFormatDesc *desc, size_t size)
{
    WCUDA_RETURN(mcBindTexture(offset, texref, devPtr, desc, size));
}

mcError_t wcudaBindTexture2D(size_t *offset, const struct textureReference *texref,
                             const void *devPtr, const mcChannelFormatDesc *desc, size_t width,
                             size_t height, size_t pitch)
{
    WCUDA_RETURN(mcBindTexture2D(offset, texref, devPtr, desc, width, height, pitch));
}

mcError_t wcudaBindTextureToArray(const struct textureReference *texref, mcArray_const_t array,
                                  const mcChannelFormatDesc *desc)
{
    WCUDA_RETURN(mcBindTextureToArray(texref, array, desc));
}

mcError_t wcudaBindTextureToMipmappedArray(const struct textureReference *texref,
                                           mcMipmappedArray_const_t mipmappedArray,
                                           const mcChannelFormatDesc *desc)
{
    WCUDA_RETURN(mcErrorNotSupported);
}

mcError_t wcudaUnbindTexture(const struct textureReference *texref)
{
    WCUDA_RETURN(mcUnbindTexture(texref));
}

mcError_t wcudaGetTextureAlignmentOffset(size_t *offset, const struct textureReference *texref)
{
    WCUDA_RETURN(mcErrorNotSupported);
}

mcError_t wcudaGetTextureReference(const struct textureReference **texref, const void *symbol)
{
    WCUDA_RETURN(mcErrorNotSupported);
}

mcError_t wcudaGetMipmappedArrayLevel(mcArray_t *levelArray,
                                      mcMipmappedArray_const_t mipmappedArray, unsigned int level)
{
    WCUDA_RETURN(mcErrorNotSupported);
}

mcError_t wcudaGetExportTable(const void **ppExportTable, const mcUuid_t *pExportTableId)
{
    /* TODO:wcuda_syqin */
    WCUDA_RETURN(mcErrorNotYetImplemented);
}

mcError_t wcudaBindSurfaceToArray(const struct surfaceReference *surfref, mcArray_const_t array,
                                  const mcChannelFormatDesc *desc)
{
    WCUDA_RETURN(mcErrorNotSupported);
}

mcError_t wcudaGetSurfaceReference(const struct surfaceReference **surfref, const void *symbol)
{
    WCUDA_RETURN(mcErrorNotSupported);
}

mcError_t wcudaCreateSurfaceObject(mcSurfaceObject_t *pSurfObject,
                                   const struct mcResourceDesc *pResDesc)
{
    WCUDA_RETURN(mcErrorNotSupported);
}

mcError_t wcudaDestroySurfaceObject(mcSurfaceObject_t surfObject)
{
    WCUDA_RETURN(mcErrorNotSupported);
}

mcError_t wcudaGetSurfaceObjectResourceDesc(struct mcResourceDesc *pResDesc,
                                            mcSurfaceObject_t surfObject)
{
    WCUDA_RETURN(mcErrorNotSupported);
}
